import cudf
import cupy as cp
import plotly.graph_objects as go
import datashader as ds
import colorcet
import os
# Visualisation Imports
import numpy as np
import xarray as xr
# datashader
import datashader as ds
import datashader.transfer_functions as tf
from datashader.transfer_functions import shade
from datashader.transfer_functions import stack
from datashader.transfer_functions import dynspread
from datashader.transfer_functions import set_background
from datashader.transfer_functions import Images, Image
from datashader.colors import Elevation
from datashader.utils import orient_array
# holoviews
import holoviews as hv
from holoviews.plotting.plotly.dash import to_dash
from holoviews.element.tiles import CartoDark
from holoviews.operation.datashader import datashade, shade, dynspread, spread, rasterize
from holoviews.operation import decimate
# plotly
from plotly.colors import sequential
from plotly.subplots import make_subplots
# Dash Import
import dash
import dash_html_components as html
from jupyter_dash import JupyterDash
DATA_PATH = '../../data/hycom'
RES_PATH = '../../results/hycom'
df = cudf.read_csv(os.path.join(RES_PATH, 'hycom_equinox_merged-201920.csv'))
df.head()
| start_date | lat | lon | water_temp_0 | salinity_0 | water_temp_2 | salinity_2 | water_temp_4 | salinity_4 | water_temp_6 | ... | xCO2_ATM_interpolated_ppm | PRES_EQU_hPa | PRES_ATM@SSP_hPa | TEMP_EQU_C | SST_C | SAL_permil | fCO2_SW@SST_uatm | fCO2_ATM_interpolated_uatm | dfCO2_uatm | WOCE_QC_FLAG | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2019-11-19 | 19.0418 | -87.4192 | 28.641375 | 36.191375 | 28.48175 | 36.194000 | 28.490375 | 36.193750 | 28.494625 | ... | 410.81 | 1014.72 | 1013.5662 | 29.14 | 29.1692 | 36.2848 | 410.02 | 393.66 | 16.36 | 2.0 |
| 1 | 2020-01-21 | 18.3305 | -65.1468 | 26.471600 | 35.826667 | 26.42980 | 35.825867 | 26.429333 | 35.828200 | 26.426533 | ... | 417.55 | 1015.40 | 1014.5055 | 27.25 | 27.3215 | 35.3726 | 390.25 | 402.13 | -11.88 | 2.0 |
| 2 | 2019-07-31 | 19.7638 | -87.1375 | 29.324125 | 36.082625 | 29.24825 | 36.083875 | 29.235125 | 36.083500 | 29.218875 | ... | 410.89 | 1014.90 | 1014.5565 | 29.98 | 29.7258 | 36.2400 | 435.94 | 393.63 | 42.32 | 2.0 |
| 3 | 2019-09-13 | 25.9173 | -79.9605 | 29.782625 | 36.122750 | 29.79925 | 36.123500 | 29.802750 | 36.124125 | 29.803750 | ... | 414.53 | 1012.47 | 1011.9442 | 29.88 | 29.9378 | 35.4666 | 413.24 | 395.84 | 17.40 | 2.0 |
| 4 | 2019-08-11 | 23.2150 | -83.5792 | 30.861500 | 36.154125 | 30.54200 | 36.152875 | 30.412500 | 36.150375 | 30.245500 | ... | 417.82 | 1017.40 | 1017.0594 | 30.57 | 30.6395 | 36.3900 | 442.06 | 400.40 | 41.66 | 2.0 |
5 rows × 26 columns
df = df[df['WOCE_QC_FLAG'] == 2]
df.head()
| start_date | lat | lon | water_temp_0 | salinity_0 | water_temp_2 | salinity_2 | water_temp_4 | salinity_4 | water_temp_6 | ... | xCO2_ATM_interpolated_ppm | PRES_EQU_hPa | PRES_ATM@SSP_hPa | TEMP_EQU_C | SST_C | SAL_permil | fCO2_SW@SST_uatm | fCO2_ATM_interpolated_uatm | dfCO2_uatm | WOCE_QC_FLAG | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2019-11-19 | 19.0418 | -87.4192 | 28.641375 | 36.191375 | 28.48175 | 36.194000 | 28.490375 | 36.193750 | 28.494625 | ... | 410.81 | 1014.72 | 1013.5662 | 29.14 | 29.1692 | 36.2848 | 410.02 | 393.66 | 16.36 | 2.0 |
| 1 | 2020-01-21 | 18.3305 | -65.1468 | 26.471600 | 35.826667 | 26.42980 | 35.825867 | 26.429333 | 35.828200 | 26.426533 | ... | 417.55 | 1015.40 | 1014.5055 | 27.25 | 27.3215 | 35.3726 | 390.25 | 402.13 | -11.88 | 2.0 |
| 2 | 2019-07-31 | 19.7638 | -87.1375 | 29.324125 | 36.082625 | 29.24825 | 36.083875 | 29.235125 | 36.083500 | 29.218875 | ... | 410.89 | 1014.90 | 1014.5565 | 29.98 | 29.7258 | 36.2400 | 435.94 | 393.63 | 42.32 | 2.0 |
| 3 | 2019-09-13 | 25.9173 | -79.9605 | 29.782625 | 36.122750 | 29.79925 | 36.123500 | 29.802750 | 36.124125 | 29.803750 | ... | 414.53 | 1012.47 | 1011.9442 | 29.88 | 29.9378 | 35.4666 | 413.24 | 395.84 | 17.40 | 2.0 |
| 4 | 2019-08-11 | 23.2150 | -83.5792 | 30.861500 | 36.154125 | 30.54200 | 36.152875 | 30.412500 | 36.150375 | 30.245500 | ... | 417.82 | 1017.40 | 1017.0594 | 30.57 | 30.6395 | 36.3900 | 442.06 | 400.40 | 41.66 | 2.0 |
5 rows × 26 columns
Dropping date, lat and lon since these are not expected to significantly impact relationships
df.drop(['start_date', 'lat', 'lon', 'WOCE_QC_FLAG', 'easting', 'northing'], axis=1, inplace=True)
df.head()
| water_temp_0 | salinity_0 | water_temp_2 | salinity_2 | water_temp_4 | salinity_4 | water_temp_6 | salinity_6 | water_temp_8 | salinity_8 | xCO2_EQU_ppm | xCO2_ATM_interpolated_ppm | PRES_EQU_hPa | PRES_ATM@SSP_hPa | TEMP_EQU_C | SST_C | SAL_permil | fCO2_SW@SST_uatm | fCO2_ATM_interpolated_uatm | dfCO2_uatm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 28.641375 | 36.191375 | 28.48175 | 36.194000 | 28.490375 | 36.193750 | 28.494625 | 36.193500 | 28.495625 | 36.193250 | 426.816 | 410.81 | 1014.72 | 1013.5662 | 29.14 | 29.1692 | 36.2848 | 410.02 | 393.66 | 16.36 |
| 1 | 26.471600 | 35.826667 | 26.42980 | 35.825867 | 26.429333 | 35.828200 | 26.426533 | 35.830333 | 26.422667 | 35.832533 | 403.554 | 417.55 | 1015.40 | 1014.5055 | 27.25 | 27.3215 | 35.3726 | 390.25 | 402.13 | -11.88 |
| 2 | 29.324125 | 36.082625 | 29.24825 | 36.083875 | 29.235125 | 36.083500 | 29.218875 | 36.083000 | 29.198750 | 36.082250 | 460.100 | 410.89 | 1014.90 | 1014.5565 | 29.98 | 29.7258 | 36.2400 | 435.94 | 393.63 | 42.32 |
| 3 | 29.782625 | 36.122750 | 29.79925 | 36.123500 | 29.802750 | 36.124125 | 29.803750 | 36.124375 | 29.803375 | 36.124375 | 431.397 | 414.53 | 1012.47 | 1011.9442 | 29.88 | 29.9378 | 35.4666 | 413.24 | 395.84 | 17.40 |
| 4 | 30.861500 | 36.154125 | 30.54200 | 36.152875 | 30.412500 | 36.150375 | 30.245500 | 36.148625 | 29.899750 | 36.150000 | 459.698 | 417.82 | 1017.40 | 1017.0594 | 30.57 | 30.6395 | 36.3900 | 442.06 | 400.40 | 41.66 |
from statsmodels.regression import linear_model
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
X_col_names = ['water_temp_0', 'salinity_0', 'water_temp_2', 'salinity_2', 'water_temp_4', 'salinity_4', 'water_temp_6', 'salinity_6', 'water_temp_8', 'salinity_8']
def fit_model(df, pred_col):
X = df[X_col_names]
y = df[[col]]
model = linear_model.OLS(y, X).fit()
print(model_summary)
def fit_interaction_model(df, pred_col):
df = df.dropna()
X = df[X_col_names]
y = df[[pred_col]]
poly_features = PolynomialFeatures(2, interaction_only=True, include_bias=False)
X_inter = poly_features.fit_transform(X.as_gpu_matrix())
X_df = cudf.DataFrame(X_inter, columns=poly_features.get_feature_names(X.columns))
inter_model = linear_model.OLS(y.to_pandas().values, X_df.to_pandas()).fit()
# inter_model = inter_model.pvalues[inter_model.pvalues < 0.05]
print(inter_model.summary())
df = df.dropna()
X = df[X_col_names]
equinox_col_names = dict({
'xCO2_EQU_ppm': 'Mole fraction of CO2 in the equilibrator headspace (ppm)',
'xCO2_ATM_interpolated_ppm': 'Mole fraction of CO2 measured in dry outside air (ppm)',
'PRES_EQU_hPa': 'Barometric pressure in the equilibrator headspace (hPa)',
'PRES_ATM@SSP_hPa': 'Barometric pressure measured outside, corrected to sea level (hPa)',
'TEMP_EQU_C': 'Water temperature in equilibrator (°C)',
'SST_C': 'Sea surface temperature (°C)',
'SAL_permil': 'Sea surface salinity on Practical Salinity Scale (ppt)',
'fCO2_SW@SST_uatm': 'Fugacity of CO2 in sea water at SST and 100% humidity (μatm)',
'fCO2_ATM_interpolated_uatm': 'Fugacity of CO2 in air corresponding to the interpolated xCO2 at SST and 100% humidity (μatm)',
'dfCO2_uatm': 'Sea water fCO2 minus interpolated air fCO2 (μatm)' ,
})
poly_features = PolynomialFeatures(2, interaction_only=True, include_bias=False)
X_inter = poly_features.fit_transform(X.as_gpu_matrix())
X_inter_cols = poly_features.get_feature_names(X.columns)
X_df = cudf.DataFrame(X_inter, columns=poly_features.get_feature_names(X.columns))
def plot_correlation(df, hycom_col):
graph_list = []
data = pd.concat([X_df.to_pandas(), df[hycom_col].to_pandas()], axis=1)
hv_data = hv.Dataset(data)
for idx, col in enumerate(X_inter_cols):
scatter = hv.Scatter(hv_data, kdims=col, vdims=hycom_col).opts(width=600, height=400, title=equinox_col_names[hycom_col])
scatter.redim(y=hv.Dimension(col))
shaded = decimate(scatter, y_range=(0, data[hycom_col].max())).opts(width=600, height=400)
graph_list.append(shaded)
del scatter
layout = hv.Layout(graph_list).cols(2)
return layout
plot_correlation(df, 'xCO2_EQU_ppm')
plot_correlation(df, 'xCO2_ATM_interpolated_ppm')
plot_correlation(df, 'fCO2_SW@SST_uatm')
plot_correlation(df, 'fCO2_ATM_interpolated_uatm')